In [1]:
import plotly.express as px
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
In [2]:
ds1 = pd.read_csv('EV_Market.csv')
ds1.head(7)
Out[2]:
Brand Model AccelSec TopSpeed_KmH Range_Km Efficiency_WhKm FastCharge_KmH RapidCharge PowerTrain PlugType BodyStyle Segment Seats PriceEuro
0 Tesla Model 3 Long Range Dual Motor 4.6 233 450 161 940 Yes AWD Type 2 CCS Sedan D 5 55480
1 Volkswagen ID.3 Pure 10.0 160 270 167 250 No RWD Type 2 CCS Hatchback C 5 30000
2 Polestar 2 4.7 210 400 181 620 Yes AWD Type 2 CCS Liftback D 5 56440
3 BMW iX3 6.8 180 360 206 560 Yes RWD Type 2 CCS SUV D 5 68040
4 Honda e 9.5 145 170 168 190 Yes RWD Type 2 CCS Hatchback B 4 32997
5 Lucid Air 2.8 250 610 180 620 Yes AWD Type 2 CCS Sedan F 5 105000
6 Volkswagen e-Golf 9.6 150 190 168 220 No FWD Type 2 CCS Hatchback C 5 31900
In [3]:
ds2 = pd.read_csv('EV_India.csv')
ds2.head(7)
Out[3]:
Sr. No. State Name Total Electric Vehicle Total Non-Electric Vehicle Total
0 1 Andaman & Nicobar Island 162 1,46,945 1,47,107
1 2 Andra Pradesh NaN NaN NaN
2 3 Arunachal Pradesh 20 2,52,965 2,52,985
3 4 Assam 64766 46,77,053 47,41,819
4 5 Bihar 83335 1,04,07,078 1,04,90,413
5 6 Chandigarh 2812 7,46,881 7,49,693
6 7 Chhattisgarh 20966 68,36,200 68,57,166
In [4]:
ds3 = pd.read_csv('EV_Sales.csv')
ds3.head(7)
Out[4]:
YEAR 2 W 3 W 4 W BUS TOTAL
0 Apr-17 96 4748 198 0 5042
1 May-17 91 6720 215 2 7028
2 Jun-17 137 7178 149 1 7465
3 Jul-17 116 8775 120 0 9011
4 Aug-17 99 8905 137 0 9141
5 Sep-17 109 7414 193 0 7716
6 Oct-17 160 7250 214 0 7624
In [5]:
ds4 = pd.read_csv('EV_FP.csv')
ds4.head(7)
Out[5]:
Name Location Year Kilometers_Driven Fuel_Type Transmission Owner_Type Mileage Engine Power Seats
0 Maruti Alto K10 LXI CNG Delhi 2014 40929 CNG Manual First 32.26 km/kg 998 CC 58.2 bhp 4.0
1 Maruti Alto 800 2016-2019 LXI Coimbatore 2013 54493 Petrol Manual Second 24.7 kmpl 796 CC 47.3 bhp 5.0
2 Toyota Innova Crysta Touring Sport 2.4 MT Mumbai 2017 34000 Diesel Manual First 13.68 kmpl 2393 CC 147.8 bhp 7.0
3 Toyota Etios Liva GD Hyderabad 2012 139000 Diesel Manual First 23.59 kmpl 1364 CC null bhp 5.0
4 Hyundai i20 Magna Mumbai 2014 29000 Petrol Manual First 18.5 kmpl 1197 CC 82.85 bhp 5.0
5 Mahindra XUV500 W8 2WD Coimbatore 2016 85609 Diesel Manual Second 16.0 kmpl 2179 CC 140 bhp 7.0
6 Toyota Fortuner 4x2 AT TRD Sportivo Pune 2015 59000 Diesel Automatic First 12.55 kmpl 2982 CC 168.7 bhp 7.0
In [6]:
print('No.of rows and columns for ds1: ', ds1.shape)
print('No.of rows and columns for ds2: ', ds2.shape)
print('No.of rows and columns for ds3: ', ds3.shape)
print('No.of rows and columns for ds4: ', ds4.shape)
No.of rows and columns for ds1:  (103, 14)
No.of rows and columns for ds2:  (36, 5)
No.of rows and columns for ds3:  (74, 6)
No.of rows and columns for ds4:  (7253, 11)
In [7]:
print("Column names of ds1: ", ds1.columns)
print("Column names of ds2: ", ds2.columns)
print("Column names of ds3: ", ds3.columns)
print("Column names of ds4: ", ds4.columns)
Column names of ds1:  Index(['Brand', 'Model', 'AccelSec', 'TopSpeed_KmH', 'Range_Km',
       'Efficiency_WhKm', 'FastCharge_KmH', 'RapidCharge', 'PowerTrain',
       'PlugType', 'BodyStyle', 'Segment', 'Seats', 'PriceEuro'],
      dtype='object')
Column names of ds2:  Index(['Sr. No.', 'State Name', 'Total Electric Vehicle',
       'Total Non-Electric Vehicle', 'Total'],
      dtype='object')
Column names of ds3:  Index(['YEAR', '2 W', '3 W', '4 W', 'BUS', 'TOTAL'], dtype='object')
Column names of ds4:  Index(['Name', 'Location', 'Year', 'Kilometers_Driven', 'Fuel_Type',
       'Transmission', 'Owner_Type', 'Mileage', 'Engine', 'Power', 'Seats'],
      dtype='object')
In [44]:
import sweetviz as sv
import warnings
warnings.filterwarnings("ignore")
report = sv.analyze(ds1)
report.show_html("sweetviz_report.html")
Done! Use 'show' commands to display/save.   |█████████████████████████████████████████| [100%]   00:02 -> (00:00 left)
Report sweetviz_report.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
In [9]:
report2 = sv.analyze(ds2)
report2.show_html("sweetviz_report2.html")
Done! Use 'show' commands to display/save.   |█████████████████████████████████████████| [100%]   00:01 -> (00:00 left)
Report sweetviz_report2.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
In [10]:
report3 = sv.analyze(ds3)
report3.show_html("sweetviz_report3.html")
Done! Use 'show' commands to display/save.   |█████████████████████████████████████████| [100%]   00:01 -> (00:00 left)
Report sweetviz_report3.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
In [11]:
report4 = sv.analyze(ds4)
report4.show_html("sweetviz_report4.html")
Done! Use 'show' commands to display/save.   |█████████████████████████████████████████| [100%]   00:01 -> (00:00 left)
Report sweetviz_report4.html was generated! NOTEBOOK/COLAB USERS: the web browser MAY not pop up, regardless, the report IS saved in your notebook/colab files.
In [12]:
plt.figure(figsize=(5,5))
sns.barplot(x='AccelSec', y=ds1['Brand'].sort_values(ascending=True), data=ds1, palette="muted", ci=None)
plt.xlabel('Price', family='serif', fontsize=15, labelpad=15)
plt.ylabel('Brands', family='serif', fontsize=15, labelpad=15)
plt.title(label="India's Electric Vehicle Acceleration", weight=200, family='sans-serif', size=15, pad=15)
plt.show()
In [13]:
x = ds1['Segment'].value_counts().plot.pie(radius=2, cmap='magma', startangle=0, textprops=dict(family='serif'), pctdistance=.5)
plt.pie(x=[1], radius=1.2, colors='white')
plt.title(label='Electric Vehicles in India: Different Segments', family='sans-serif', size=15, pad=80)
plt.ylabel('')
plt.show()
In [14]:
sns.catplot(data=ds1, x='Brand', kind='count', palette='dark', height=6, aspect=2)
sns.despine(right=False, top=False)
plt.tick_params(axis='x', rotation=40)
plt.xlabel('Brand',family='serif', size=15)
plt.ylabel('Count', family='serif', size=19)
plt.title("A brand's total number of EV models manufactured", family='sans-serif', size=19, pad=15)
plt.show()
In [15]:
x = ds1['BodyStyle'].value_counts().plot.pie(radius=2, startangle=0, textprops=dict(family='serif'), cmap='inferno')
plt.pie(x=[1], radius=1.2, colors='white')
plt.title(label='Body Styles of Electric Vehicles in India', family='sans-serif', size=15, pad=110)
plt.ylabel('')
plt.show()
In [16]:
sns.countplot(data=ds1, x='Seats', palette='plasma')
plt.xlabel('Number of Seats', family='serif', size=12, labelpad=12)
plt.ylabel('Count', family='serif', size=12, labelpad=12)
plt.title(label='Electric Vehicles with Different Seating Capacity in India', family='sans-serif', size=15, pad=15)
plt.show()
In [17]:
sns.catplot(kind='bar', data=ds1, x='Brand', y='Seats', palette='inferno', ci=None, height=6, aspect=2)
sns.despine(right=False, top=False)
plt.tick_params(axis='x', rotation=40)
plt.xlabel('Brand',family='serif', size=16, labelpad=12)
plt.ylabel('Number of Seats', family='serif', size=16, labelpad=12)
plt.xticks(family='serif')
plt.yticks(family='serif')
plt.title('Brand-specific analysis of seat numbers', family='sans-serif', size=19, pad=15)
Out[17]:
Text(0.5, 1.0, 'Brand-specific analysis of seat numbers')
In [18]:
ds1['PlugType'].value_counts().sort_values(ascending=False).plot.barh()
plt.xlabel('Count', family='serif', size=12, labelpad=14)
plt.ylabel('Plug Type', family='serif', size=12, labelpad=14)
plt.title('Plug Types for Electric Vehicles in India', family='sans-serif', size=15, pad=15)
plt.show()
In [19]:
plt.plot(ds1['PriceEuro'], color='violet')
plt.xlabel('Number of Samples', family='serif', size=15, labelpad=12)
plt.ylabel('Price', family='serif', size=15, labelpad=12)
plt.title('Price Comparison', family='sans-serif', size=15, pad=12);
In [20]:
plt.figure(figsize=(8, 8))
sns.barplot(data=ds1, x='TopSpeed_KmH', y='Brand', ci=None, palette='dark')
plt.xlabel('Max Speed', family='serif', size=15)
plt.ylabel('Brand', family='serif', size=15)
plt.title(label='Brand-wise Speed Comparison of EVs in India', family='serif', size=15, pad=12)
plt.show()
In [21]:
sns.catplot(kind='bar', data=ds1, x='Brand', y='Range_Km', palette='muted', ci=None, height=6, aspect=2)
sns.despine(right=False, top=False)
plt.tick_params(axis='x', rotation=40)
plt.xlabel('Brand',family='serif', size=15, labelpad=12)
plt.ylabel('Range', family='serif', size=15, labelpad=12)
plt.title('Brand-wise Analysis of the Range Parameter', family='serif', size=19, pad=15)
Out[21]:
Text(0.5, 1.0, 'Brand-wise Analysis of the Range Parameter')
In [22]:
melt_ds2 = ds2.melt(id_vars='State Name', value_vars=['Total Electric Vehicle', 'Total Non-Electric Vehicle'], var_name='Vehicle Type', value_name='Count')

reduced_ds2 = melt_ds2.iloc[::4]

plt.figure(figsize=(9,6))
sns.barplot(x='State Name', y='Count', hue='Vehicle Type', data=reduced_ds2, palette='plasma').invert_yaxis()
plt.xlabel('State')
plt.ylabel('Count')
plt.title('Comparison of Electric and Non-Electric Vehicles by State (Downsampled)')
plt.legend(title='Vehicle Type')
plt.show()
In [23]:
reduced_ds2 = ds2.iloc[::4]

plt.figure(figsize=(9, 6))
sns.barplot(x='State Name', y='Total Electric Vehicle', data=reduced_ds2, palette='viridis').invert_yaxis()
plt.xlabel('State')
plt.ylabel('Total Electric Vehicle')
plt.title('Total Electric Vehicle by State (Downsampled)')
plt.show()
In [24]:
plt.figure(figsize=(9,6))
sns.barplot(x='State Name', y='Total Non-Electric Vehicle', data=reduced_ds2, palette='dark').invert_yaxis()
plt.xlabel('State')
plt.ylabel('Total Non-Electric Vehicle')
plt.title('Total Non-Electric Vehicle by State (Downsampled)')
plt.show()
In [25]:
ds3 = pd.read_csv('EV_Sales.csv')
import plotly.express as px
fig = px.line(ds3, x='YEAR', y='2 W', title='Year-wise Trend', labels={'YEAR': 'YEAR', '2 W': '2 W'})
fig.show()
In [26]:
fig = px.line(ds3, x='YEAR', y='3 W', title='Year-wise Trend', labels={'YEAR': 'YEAR', '3 W': '3 W'})
fig.show()
In [27]:
fig = px.line(ds3, x='YEAR', y='4 W', title='Year-wise Trend', labels={'YEAR': 'YEAR', '4 W': '4 W'})
fig.show()
In [28]:
fig = px.line(ds3, x='YEAR', y='BUS', title='Year-wise Trend', labels={'YEAR': 'YEAR', 'BUS': 'BUS'})
fig.show()
In [29]:
reduced_ds4 = ds4.iloc[::850]
plt.figure(figsize=(10, 6))
sns.swarmplot(x='Mileage', y='Name', data=reduced_ds4, palette='inferno')
plt.xlabel('Mileage', fontsize=12)
plt.ylabel('Name of EV', fontsize=12)
plt.title('Mileage of EVs in India', fontsize=15)
plt.show()
In [30]:
ds1['PowerTrain'].replace(to_replace=['RWD','FWD','AWD'],value=[0, 1, 2],inplace=True)
ds1['RapidCharge'].replace(to_replace=['No','Yes'],value=[0, 1],inplace=True)
X = ds1[['AccelSec','TopSpeed_KmH','Efficiency_WhKm','FastCharge_KmH', 'Range_Km', 'RapidCharge', 'Seats', 'PriceEuro','PowerTrain']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
pca = PCA(n_components=9)
X_pca = pca.fit_transform(X_scaled)
df_pca = pd.DataFrame(X_pca, columns=['PC1', 'PC2', 'PC3', 'PC4', 'PC5', 'PC6', 'PC7', 'PC8', 'PC9'])
df_pca.head(7)
Out[30]:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9
0 2.429225 -0.554599 -1.147772 -0.882791 0.839988 -0.959297 0.998880 0.711148 -0.396662
1 -2.322483 -0.345449 0.896473 -1.305529 0.079598 0.235116 -0.213678 -0.544135 -0.181867
2 1.587851 0.008899 -0.650523 0.041024 0.593537 -0.698248 0.058718 0.248837 -0.202775
3 0.291018 -0.000150 -0.307702 -0.514196 -1.608861 0.291624 0.364999 -0.235543 0.261663
4 -2.602679 -0.626489 -0.888088 0.585294 -0.802108 0.027387 -0.084955 -0.507790 -0.049904
5 3.429398 -0.673183 -0.731118 -0.463163 0.563761 0.323336 -0.871201 0.039206 -0.029359
6 -2.232736 -0.044259 1.091770 -0.364093 0.989476 -0.221604 -0.070239 -0.557169 0.030956
In [31]:
wcss = []
for i in range(1, 11):
  kmean = KMeans(n_clusters=i, init='k-means++', random_state=90)
  kmean.fit(X_pca)
  wcss.append(kmean.inertia_)
plt.figure(figsize=(6,6))
plt.title('Plot of the Elbow Method', size=15, family='serif')
plt.plot(range(1, 11), wcss, color= "magenta")
plt.xticks(range(1, 11), family='serif')
plt.yticks(family='serif')
plt.xlabel('Number of Custers (K)', family='serif')
plt.ylabel('WCSS', family='serif')
plt.grid()
plt.tick_params(axis='both', direction='inout', length=6, color='purple', grid_color='lightgray', grid_linestyle='--')
plt.show()
In [40]:
kmean = KMeans(n_clusters=3, init='k-means++', random_state=90)
kmean.fit(X_pca)
Out[40]:
KMeans(n_clusters=3, random_state=90)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
KMeans(n_clusters=3, random_state=90)
In [41]:
print(kmean.labels_)
[2 0 1 0 0 2 0 0 0 1 1 0 0 1 0 0 2 0 0 0 0 1 0 2 2 0 0 1 0 0 1 0 0 1 0 0 0
 0 0 1 2 0 1 0 0 0 0 2 2 0 1 2 0 0 1 0 0 0 0 2 0 1 1 1 0 2 0 1 0 1 0 1 2 1
 0 0 1 0 1 2 0 1 0 0 1 0 1 1 1 0 1 0 0 1 0 0 0 0 0 1 1 1 1]
In [42]:
pd.Series(kmean.labels_).value_counts()
Out[42]:
0    58
1    32
2    13
Name: count, dtype: int64
In [43]:
ds1['clusters'] = kmean.labels_
plt.figure(figsize=(8,6))
sns.scatterplot(data=df_pca, x='PC1', y='PC9', s=70, hue=kmean.labels_, palette='muted', zorder=2, alpha=.9)
plt.scatter(x=kmean.cluster_centers_[:,0], y=kmean.cluster_centers_[:,1], marker="*", c="black", s=80, label="centroids")
plt.xlabel('PC1', family='serif', size=12, labelpad=12)
plt.ylabel('PC9', family='serif', size=12,labelpad=12)
plt.grid()
plt.tick_params(grid_color='lightgray', grid_linestyle='--', zorder=1)
plt.legend(title='Labels', fancybox=True, shadow=True)
plt.title('K-Means Clustering', family='serif', size=15,pad=12)
plt.show()
In [ ]: